# Replication materials ---------------------------------------------------
# Paper:    Campaigns and the Selection of Policy-seeking Representatives.
#           Legislative Studies Quarterly
# Authors: Shaun Bowler, Gail McElroy, Stefan Müller

#--------------------------------------------------------------------------
# Script for creating master data (European Parliament 6, 2004-2009)
#--------------------------------------------------------------------------

# Load necessary packages -------------------------------------------------
library(car)
library(readstata13)
library(tidyverse)

# Import original files into R environment -----------------------------

# Import the two Stata files, containing the legislative activity and survey answers of EP6
EP6 <- read.dta13("data_ep_2004-2009/ep_6_activity.dta") #read MEP activity file

# NOTE: THIS FILE CANNOT BE INCLUDED IN THE REPLICATION MATERIAL
Survey2006 <- read.dta13("data_ep_2004-2009/Survey2006_ID_merged_reduced_NAME.dta") # read survey file

# Creat variable which takes the value 1 for all candidates in the survey file
Survey2006$respondent <- "1"

# Recode q1_5b to get experience score
Survey2006$incumbent <- car::recode(Survey2006$q1_5b, "1:7=0; 'NA'='NA'; else=1")

# Read file with electoral systems for each country (EP6)
EP6electoralsystems <- read.csv("data_ep_2004-2009/ep_6_electoralsystems.csv") 
# Coding of "electoral_system04": 1 = closed lists; 2 = preferntial; 3 = STV
# Northern Ireland (northern ireland) threated separately because STV while rest of UK closed lists

# The "country" variable of the three MEPs from Northern Ireland was canged 
# from "united kingdom" to "northern ireland"

# Read file with party group seats
EP6partygroups <- read.csv("data_ep_2004-2009/ep_6_partygroups.csv", fileEncoding = "utf-8")

# In the EP6 dataset some MEPs are included more than once if they took part in more than one committee.
# However, the variables of interest (legislative activity) are the same for both both columns.

# Therefore, with the following command only keep unique observations depending on MEPs' names
EP6_reduced <- subset(EP6, !duplicated(EP6["name"]))

# Recode commpos and partypos to dummy
EP6_reduced$committee_chair <- as.factor(car::recode(EP6_reduced$comm_pos, "'Member'=0; ''=NA; else=1"))

EP6_reduced$partypos <- as.factor(car::recode(EP6_reduced$EPgrouppos, "'Member'=0; ''=NA; else=1"))

# In the Survey2006 dataset some MEPs are included more than once.
# With the following command I remove lines if "name" is the same 
# (the number of observations remains the same if I use "b_2" instead of "name")
Survey2006_reduced <- subset(Survey2006, !duplicated(Survey2006["b_2"]))


# Merge datasets -------------------------------------------------------

# Merge the two reduced datasets using the Survey2006_reduced as the base
EP6_Survey2006_reduced <- dplyr::left_join(EP6_reduced, Survey2006_reduced, by = "name")

# Add electoral systems
EP6_Survey2006_electoralsystems <- dplyr::left_join(EP6_Survey2006_reduced,
                                                    EP6electoralsystems, by = "country")

# Now merge survey with party groups to get seats and shares (based on Hix and Hoyland, 2011: 142)
EP6_Survey_2006 <- dplyr::left_join(EP6_Survey2006_electoralsystems, 
                                                               EP6partygroups, by = "polgroup")


# Recode variables and create campaign index -------------------------------

# Classic campaigning
EP6_Survey_2006$use_telephone <- car::recode(EP6_Survey_2006$q9_6_1, "1:3=1; 4=0")
EP6_Survey_2006$use_doortodoor <- car::recode(EP6_Survey_2006$q9_6_2, "1:3=1; 4=0")
EP6_Survey_2006$use_mailing <- car::recode(EP6_Survey_2006$q9_6_3, "1:3=1; 4=0")
EP6_Survey_2006$use_partymeetings <- car::recode(EP6_Survey_2006$q9_6_4, "1:3=1; 4=0")
EP6_Survey_2006$use_publicmeetings <- car::recode(EP6_Survey_2006$q9_6_5, "1:3=1; 4=0")
EP6_Survey_2006$use_pressconferences <- car::recode(EP6_Survey_2006$q9_6_6, "1:3=1; 4=0")
EP6_Survey_2006$use_mediarelations <- car::recode(EP6_Survey_2006$q9_6_7, "1:3=1; 4=0")
EP6_Survey_2006$use_fundraising <- car::recode(EP6_Survey_2006$q9_6_11, "1:3=1; 4=0")

# Post-modern campaigning
EP6_Survey_2006$use_website <- car::recode(EP6_Survey_2006$q9_6_8, "1:3=1; 4=0")
EP6_Survey_2006$use_email <- car::recode(EP6_Survey_2006$q9_6_9, "1:3=1; 4=0")
EP6_Survey_2006$use_blog <- car::recode(EP6_Survey_2006$q9_6_10, "1:3=1; 4=0")

# Promotion material
EP6_Survey_2006$use_newsletters <- car::recode(EP6_Survey_2006$q9_7_1, "1=1;NA=0" )
EP6_Survey_2006$use_posters <- car::recode(EP6_Survey_2006$q9_7_2, "1=1;NA=0" )
EP6_Survey_2006$use_stickers <- car::recode(EP6_Survey_2006$q9_7_5, "1=1;NA=0" )
EP6_Survey_2006$use_radioads <- car::recode(EP6_Survey_2006$q9_7_6, "1=1;NA=0" )
EP6_Survey_2006$use_tvads <- car::recode(EP6_Survey_2006$q9_7_7, "1=1;NA=0" )
EP6_Survey_2006$use_newspaperads <- car::recode(EP6_Survey_2006$q9_7_8, "1=1;NA=0" )


# Create additive indices

EP6_Survey_2006$use_classic <- rowSums(EP6_Survey_2006[,c("use_telephone",
                                                          "use_doortodoor",
                                                          "use_partymeetings",
                                                          "use_mailing",
                                                          "use_publicmeetings",
                                                          "use_pressconferences",
                                                          "use_mediarelations",
                                                          "use_fundraising")], na.rm=TRUE)

EP6_Survey_2006$use_postmodern <- as.numeric(rowSums(EP6_Survey_2006[,c("use_website",
                                                           "use_email",
                                                           "use_blog")], na.rm=TRUE) )



EP6_Survey_2006$use_material <- as.numeric(rowSums(EP6_Survey_2006[,c("use_newsletters",
                                                                           "use_posters",
                                                                           "use_stickers",
                                                                           "use_radioads",
                                                                           "use_tvads",
                                                                           "use_newspaperads"
                                                                           )], na.rm=FALSE) )

EP6_Survey_2006$use_campaign <- rowSums(EP6_Survey_2006[,c("use_classic", "use_postmodern", "use_material")], na.rm=TRUE)


# Recode variables for ordinal index --------------------------------------

EP6_Survey_2006$use_ord_telephone <- car::recode(EP6_Survey_2006$q9_6_1, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_doortodoor <- car::recode(EP6_Survey_2006$q9_6_2, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_mailing <- car::recode(EP6_Survey_2006$q9_6_3, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_partymeetings <- car::recode(EP6_Survey_2006$q9_6_4, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_publicmeetings <- car::recode(EP6_Survey_2006$q9_6_5, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_pressconferences <- car::recode(EP6_Survey_2006$q9_6_6, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_mediarelations <- car::recode(EP6_Survey_2006$q9_6_7, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_fundraising <- car::recode(EP6_Survey_2006$q9_6_11, "4=0; 3=1; 2=2; 1=3")
EP6_Survey_2006$use_ord_website <- car::recode(EP6_Survey_2006$q9_6_8, "4=0; 3=1; 2=2; 1=3") # Website
EP6_Survey_2006$use_ord_mail <- car::recode(EP6_Survey_2006$q9_6_9, "4=0; 3=1; 2=2; 1=3")  # Direct mail
EP6_Survey_2006$use_ord_blog <- car::recode(EP6_Survey_2006$q9_6_10, "4=0; 3=1; 2=2; 1=3")  # Weblog

# Create additive index (ordinal)
EP6_Survey_2006$use_ord_total <- rowSums(EP6_Survey_2006[,c("use_ord_telephone",
                                                            "use_ord_doortodoor",
                                                            "use_ord_partymeetings",
                                                            "use_ord_mailing",
                                                            "use_ord_publicmeetings",
                                                            "use_ord_pressconferences",
                                                            "use_ord_mediarelations",
                                                            "use_ord_fundraising",
                                                            "use_ord_website",
                                                            "use_ord_mail",
                                                            "use_ord_blog")], na.rm=TRUE)


# Recode and generate legislative activity variables ----------------------

EP6_Survey_2006$vote_activity <- EP6_Survey_2006$activity * 100 # to get percent
EP6_Survey_2006$questions <- EP6_Survey_2006$Questions
EP6_Survey_2006$questions_qtime <- EP6_Survey_2006$parlq_vwquestions # correct variable?
EP6_Survey_2006$reports <- EP6_Survey_2006$reports
EP6_Survey_2006$opinions <- EP6_Survey_2006$opinions_vw
EP6_Survey_2006$motions <- EP6_Survey_2006$motions_vw

EP6_Survey_2006$leg_activity <- rowSums(EP6_Survey_2006[,c("questions",
                                                           "questions_qtime",
                                                            "reports",
                                                           "opinions",
                                                           "motions_vw")], na.rm=TRUE)

# Generate and recode further variables -----------------------------------

# Reoce number of months served in European Parliament
EP6_Survey_2006$months_served <-EP6_Survey_2006$duration
EP6_Survey_2006$questions_month <- EP6_Survey_2006$Questions / EP6_Survey_2006$duration
EP6_Survey_2006$questions_qtime_month <- EP6_Survey_2006$parlq_vwquestions / EP6_Survey_2006$duration # correct variable?
EP6_Survey_2006$reports_month <- EP6_Survey_2006$reports / EP6_Survey_2006$duration
EP6_Survey_2006$opinions_month <- EP6_Survey_2006$opinions_vw / EP6_Survey_2006$duration
EP6_Survey_2006$motions_month <- EP6_Survey_2006$motions_vw / EP6_Survey_2006$duration

EP6_Survey_2006$leg_activity_month <- rowSums(EP6_Survey_2006[,c("questions_month",
                                                           "questions_qtime_month",
                                                           "reports_month",
                                                           "opinions_month",
                                                           "motions_month")], na.rm = TRUE)

# Create age and age squared
EP6_Survey_2006$age <- EP6_Survey_2006$ageyr
EP6_Survey_2006$age_squared <- EP6_Survey_2006$agesq


# Create female dummy variable
EP6_Survey_2006$female <- car::recode(EP6_Survey_2006$gender, "'male'=0; 'female'=1; NA=NA")

# Recode "respondent" variable
EP6_Survey_2006$respondent <- car::recode(EP6_Survey_2006$respondent, "1=1; NA=0")

# Keep variables needed for regressions ---

EP6_final <- dplyr::select(EP6_Survey_2006,
                                 respondent,
                                 incumbent,
                                 open_list,
                                 prop_rep,
                                 party_publish,
                                 party_group,
                                 seats,
                                 seat_share,
                                 seat_share_others0,
                                 committee_chair,
                                 partypos,
                                 country,
                                  Country,
                                 party_group,
                                 use_telephone,
                                 use_doortodoor,
                                 use_partymeetings,
                                 use_mailing,
                                 use_publicmeetings,
                                 use_pressconferences,
                                 use_mediarelations,
                                 use_fundraising,
                                 use_website,
                                 use_email,
                                 use_blog,
                                 use_newsletters,
                                 use_posters,
                                 use_stickers,
                                 use_radioads,
                                 use_tvads,
                                 use_newspaperads,
                                 use_campaign,
                                 use_classic,
                                 use_postmodern,
                                 use_material,
                                 use_ord_telephone,
                                 use_ord_doortodoor,
                                 use_ord_partymeetings,
                                 use_ord_mailing,
                                 use_ord_publicmeetings,
                                 use_ord_pressconferences,
                                 use_ord_mediarelations,
                                 use_ord_fundraising,
                                 use_ord_website,
                                 use_ord_mail,
                                 use_ord_blog,
                                 use_ord_total,
                                 questions,
                                 questions_qtime,
                                 reports,
                                 opinions,
                                 vote_activity,
                                 motions_vw,
                                 questions_month,
                                 questions_qtime_month,
                                 reports_month,
                                 opinions_month,
                                 motions_month,
                                 leg_activity,
                                 leg_activity_month,
                                 age,
                                 age_squared,
                                 female)




# Save this object as a csv file which can be used to replicate all regressions ---

EP6_final <- EP6_final %>% 
  dplyr::mutate(leg_activity_month_round = ceiling(leg_activity_month)) %>% 
  dplyr::mutate(reports_round = ceiling(reports))

write_csv(EP6_final, "EP6_final.csv")